From 69921554ab9e0c18823b6b2be85f965a34a9a5fa Mon Sep 17 00:00:00 2001
From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= <pippin@gimp.org>
Date: Fri, 18 Nov 2016 23:38:12 +0100
Subject: [PATCH] fast-float: implement linear interpolation in LUT

---
 extensions/fast-float.c | 140 ++++++++++++++++++++++++++++++++--------
 1 file changed, 114 insertions(+), 26 deletions(-)

diff --git a/extensions/fast-float.c b/extensions/fast-float.c
index 7393645..737fa83 100644
--- a/extensions/fast-float.c
+++ b/extensions/fast-float.c
@@ -39,8 +39,9 @@ typedef struct BablLookup
   BablLookupFunction function;
   void              *data;
   int               shift;
-  uint32_t            positive_min, positive_max, negative_min, negative_max;
-  uint32_t            bitmask[babl_LOOKUP_MAX_ENTRIES/32];
+  uint32_t          positive_min, positive_max, negative_min, negative_max;
+  uint32_t          bitmask[babl_LOOKUP_MAX_ENTRIES/32];
+  int               entries;
   float             table[];
 } BablLookup;
 
@@ -54,44 +55,73 @@ static BablLookup *babl_lookup_new (BablLookupFunction  function,
 static void        babl_lookup_free      (BablLookup         *lookup);
 #endif
 
+#include <string.h>
 
 static inline float
 babl_lookup (BablLookup *lookup,
-             float      number)
+             float       number)
 {
-  union
-  {
-    float   f;
-    uint32_t i;
-  } u;
+  union { float   f; uint32_t i; } u;
+  union { float   f; uint32_t i; } ub;
+  union { float   f; uint32_t i; } ua;
+
   uint32_t i;
+  float dx = 0.0;
 
   u.f = number;
-  i = (u.i << LSHIFT )>> lookup->shift;
+  i = (u.i << LSHIFT ) >> lookup->shift;
+
+  if (i > lookup->positive_min && i < lookup->positive_max)
+  {
+    ua.i = ((i) << lookup->shift)    >> LSHIFT;
+    ub.i = ((i+ 1) << lookup->shift) >> LSHIFT;
 
-  if (i > lookup->positive_min &&
-      i < lookup->positive_max)
     i = i - lookup->positive_min;
-  else if (i > lookup->negative_min &&
-           i < lookup->negative_max)
+  }
+  else if (i > lookup->negative_min && i < lookup->negative_max)
+  {
+
+    ua.i = ((i) << lookup->shift)    >> LSHIFT;
+    ub.i = ((i+ 1) << lookup->shift) >> LSHIFT;
+
     i = i - lookup->negative_min + (lookup->positive_max - lookup->positive_min);
+  }
   else
+  {
     return lookup->function (number, lookup->data);
+  }
+
+  {
+    uint32_t bm =u.i & 0b11000000000000000000000000000000;
+    ua.i |= bm;
+    ub.i |= bm;
+  }
+  dx = (u.f-ua.f) / (ub.f - ua.f);
+
+  {
 
   if (!(lookup->bitmask[i/32] & (1UL<<(i & 31))))
     {
-      /* XXX: should look up the value in the middle of the range
-       *      that yields a given value,
-       *
-       *      potentially even do linear interpolation between
-       *      the two neighbour values to get away with a tiny
-       *      lookup table.. 
-       */
-      lookup->table[i]= lookup->function (number, lookup->data);
+      lookup->table[i]= lookup->function (ua.f, lookup->data);
+      lookup->bitmask[i/32] |= (1UL<<(i & 31));
+    }
+  i++;
+  if (i< lookup->entries-2)
+  {
+    if (!(lookup->bitmask[i/32] & (1UL<<(i & 31))))
+    {
+      lookup->table[i]= lookup->function (ub.f, lookup->data);
       lookup->bitmask[i/32] |= (1UL<<(i & 31));
     }
 
-  return lookup->table[i];
+    return lookup->table[i-1] * (1.0-dx) +
+           lookup->table[i] * (dx);
+  }
+  else
+  {
+    return lookup->table[i-1];
+  }
+  }
 }
 
 static BablLookup *
@@ -215,6 +245,9 @@ babl_lookup_new (BablLookupFunction function,
   lookup->function = function;
   lookup->data = data;
 
+  lookup->entries = (positive_max-positive_min)+
+                    (negative_max-negative_min);
+
   return lookup;
 }
 
@@ -292,6 +325,50 @@ conv_rgbaF_linear_rgbAF_gamma (unsigned char *src,
   return samples;
 }
 
+
+static INLINE long
+conv_rgbaF_linear_rgbA8_gamma (unsigned char *src, 
+                               unsigned char *dst, 
+                               long           samples)
+{
+   float *fsrc = (float *) src;
+   uint8_t *cdst = (uint8_t *) dst;
+   int n = samples;
+
+   while (n--)
+     {
+       float alpha = fsrc[3];
+       if (alpha == 1.0)
+       {
+         *cdst++ = linear_to_gamma_2_2_lut (*fsrc++) * 0xff + 0.5f;
+         *cdst++ = linear_to_gamma_2_2_lut (*fsrc++) * 0xff + 0.5f;
+         *cdst++ = linear_to_gamma_2_2_lut (*fsrc++) * 0xff + 0.5f;
+         *cdst++ = 0xff;
+         fsrc++;
+       }
+       else if (alpha == 0.0)
+       {
+         *cdst++ = 0.0;
+         *cdst++ = 0.0;
+         *cdst++ = 0.0;
+         *cdst++ = 0.0;
+         fsrc+=4;
+       }
+       else
+       {
+         float balpha = alpha * 0xff;
+         *cdst++ = linear_to_gamma_2_2_lut (*fsrc++) * balpha + 0.5f;
+         *cdst++ = linear_to_gamma_2_2_lut (*fsrc++) * balpha + 0.5f;
+         *cdst++ = linear_to_gamma_2_2_lut (*fsrc++) * balpha + 0.5f;
+         *cdst++ = balpha + 0.5;
+         fsrc++;
+       }
+     }
+  return samples;
+}
+
+
+
 static INLINE long
 conv_rgbAF_linear_rgbAF_gamma (unsigned char *src, 
                                unsigned char *dst, 
@@ -446,6 +523,16 @@ init (void)
     babl_component ("B'a"),
     babl_component ("A"),
     NULL);
+
+  const Babl *rgbA8_gamma = babl_format_new (
+    babl_model ("R'aG'aB'aA"),
+    babl_type ("u8"),
+    babl_component ("R'a"),
+    babl_component ("G'a"),
+    babl_component ("B'a"),
+    babl_component ("A"),
+    NULL);
+
   const Babl *rgbF_linear = babl_format_new (
     babl_model ("RGB"),
     babl_type ("float"),
@@ -465,8 +552,8 @@ init (void)
     float f;
     float a;
 
-    fast_pow = babl_lookup_new (core_lookup, NULL, 0.0, 1.0,   0.00005);
-    fast_rpow = babl_lookup_new (core_rlookup, NULL, 0.0, 1.0, 0.00005);
+    fast_pow = babl_lookup_new (core_lookup, NULL, 0.0, 1.0,   0.00033);
+    fast_rpow = babl_lookup_new (core_rlookup, NULL, 0.0, 1.0, 0.00033);
 
     for (f = 0.0; f < 1.0; f+= 0.0000001)
       {
@@ -480,10 +567,11 @@ init (void)
 
   o (rgbAF_linear, rgbAF_gamma);
   o (rgbaF_linear, rgbAF_gamma);
+  o (rgbaF_linear, rgbA8_gamma);
   o (rgbaF_linear, rgbaF_gamma);
   o (rgbaF_gamma,  rgbaF_linear);
-  o (rgbF_linear, rgbF_gamma);
-  o (rgbF_gamma,  rgbF_linear);
+  o (rgbF_linear,  rgbF_gamma);
+  o (rgbF_gamma,   rgbF_linear);
 
   return 0;
 }
-- 
2.30.2